AFT Data Generating Mechanism and Subgroup Analysis

Simulation Study Using GBSG Data

Author

Your Name

Published

October 30, 2025

Show code
# Set options
knitr::opts_chunk$set(
  echo = TRUE,
  warning = FALSE,
  message = FALSE,
  fig.align = 'center',
  fig.retina = 2
)

# Load required libraries
library(survival)
library(knitr)
library(kableExtra)
library(ggplot2)
library(dplyr)
library(tidyr)
library(purrr)
library(broom)
library(gridExtra)

library(weightedsurv)

library(forestsearch)
library(doFuture)
library(doRNG)

# Set theme for plots
theme_set(theme_minimal(base_size = 12))

# library(devtools)
# install_github("larry-leon/forestsearch")
# install_github("larry-leon/weightedsurv")

library(forestsearch)

1 Introduction

1.1 Study Overview

This document presents a comprehensive simulation study using the German Breast Cancer Study Group (GBSG) dataset. We demonstrate:

  1. Flexible subgroup definition using quantile-based cutpoints
  2. Calibration of interaction effects to achieve target hazard ratios
  3. Sensitivity analysis of treatment-subgroup interactions
  4. Forest search methodology for subgroup identification
  5. Bootstrap inference for subgroup stability

1.2 Data Description

The GBSG dataset contains information from a randomized clinical trial of adjuvant therapy for node-positive breast cancer patients.

Show code
# Load GBSG data
data(cancer)

# Dataset summary
data_summary <- data.frame(
  Variable = names(gbsg),
  Type = sapply(gbsg, class),
  Missing = sapply(gbsg, function(x) sum(is.na(x))),
  Unique = sapply(gbsg, function(x) length(unique(x))),
  Description = c(
    "Patient ID",
    "Hormonal therapy (0=no, 1=yes)",
    "Age in years",
    "Menopausal status (0=pre, 1=post)",
    "Tumor size (mm)",
    "Tumor grade (1,2,3)",
    "Number of positive nodes",
    "Progesterone receptor (fmol/l)",
    "Estrogen receptor (fmol/l)",
    "Recurrence-free survival time (days)",
    "Censoring status (0=censored, 1=event)"
  )
)

kable(data_summary, row.names = FALSE) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
GBSG Dataset Overview
Variable Type Missing Unique Description
pid integer 0 686 Patient ID
age integer 0 54 Hormonal therapy (0=no, 1=yes)
meno integer 0 2 Age in years
size integer 0 58 Menopausal status (0=pre, 1=post)
grade integer 0 3 Tumor size (mm)
nodes integer 0 30 Tumor grade (1,2,3)
pgr integer 0 242 Number of positive nodes
er integer 0 244 Progesterone receptor (fmol/l)
hormon integer 0 2 Estrogen receptor (fmol/l)
rfstime integer 0 574 Recurrence-free survival time (days)
status integer 0 2 Censoring status (0=censored, 1=event)
Show code
# Create distribution plots
p1 <- ggplot(gbsg, aes(x = age)) +
  geom_histogram(bins = 30, fill = "steelblue", alpha = 0.7) +
  labs(title = "Age Distribution", x = "Age (years)", y = "Count")

p2 <- ggplot(gbsg, aes(x = log1p(er))) +
  geom_histogram(bins = 30, fill = "darkgreen", alpha = 0.7) +
  labs(title = "Estrogen Receptor (log scale)", x = "log(ER + 1)", y = "Count")

p3 <- ggplot(gbsg, aes(x = log1p(pgr))) +
  geom_histogram(bins = 30, fill = "darkred", alpha = 0.7) +
  labs(title = "Progesterone Receptor (log scale)", x = "log(PGR + 1)", y = "Count")

p4 <- ggplot(gbsg, aes(x = factor(meno), fill = factor(hormon))) +
  geom_bar(position = "dodge", alpha = 0.7) +
  scale_fill_manual(values = c("0" = "coral", "1" = "steelblue"),
                    labels = c("No Hormone", "Hormone")) +
  labs(title = "Treatment by Menopausal Status", 
       x = "Menopausal Status", y = "Count", fill = "Treatment")

grid.arrange(p1, p2, p3, p4, ncol = 2)

Distribution of Key Variables

2 AFT Data Generating Mechanism

2.1 Model Specification

We use an Accelerated Failure Time (AFT) model with Weibull distribution:

\[\log(T) = \mu + \gamma' X + \sigma \epsilon\]

where:

  • \(T\) is the survival time
  • \(\mu\) is the intercept
  • \(\gamma\) contains the covariate effects including treatment and interaction
  • \(X\) includes covariates and treatment×subgroup interaction
  • \(\sigma\) is the scale parameter
  • \(\epsilon\) follows an extreme value distribution

2.2 Subgroup Definition

We define a subgroup based on two characteristics:

  1. Low estrogen receptor (ER): ER ≤ 25th percentile
  2. Pre-menopausal status: meno = 0
Show code
# Calculate ER quantile cutpoint
er_quantile <- 0.2612616  # Approximately 25th percentile
er_cutpoint <- quantile(gbsg$er, probs = er_quantile)

# Create subgroup indicator
gbsg$subgroup <- with(gbsg, (er <= er_cutpoint) & (meno == 0))

# Subgroup summary
subgroup_summary <- data.frame(
  Criterion = c("ER ≤ 25th percentile", "Pre-menopausal", "Both conditions"),
  N = c(sum(gbsg$er <= er_cutpoint), 
        sum(gbsg$meno == 0),
        sum(gbsg$subgroup)),
  Proportion = c(mean(gbsg$er <= er_cutpoint),
                 mean(gbsg$meno == 0),
                 mean(gbsg$subgroup))
)

kable(subgroup_summary, 
      col.names = c("Criterion", "N", "Proportion"),
      digits = 3) %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Criterion N Proportion
ER ≤ 25th percentile 179 0.261
Pre-menopausal 290 0.423
Both conditions 84 0.122

3 Calibrating Interaction Effects

3.1 Finding k_inter for Target Hazard Ratio

We calibrate the interaction parameter k_inter to achieve specific hazard ratios in the harm subgroup.

Show code
# Transform time to months
df_gbsg <- gbsg
df_gbsg$tte <- with(gbsg, rfstime/30.4375)

# Find k_inter for HR_harm = 2.0
result <- forestsearch::find_k_inter_for_target_hr(
  target_hr_harm = 2.0,
  data = gbsg,
  outcome_var = "rfstime",
  event_var = "status",
  treatment_var = "hormon",
  continuous_vars = c("age", "er", "pgr"),
  factor_vars = c("meno", "grade"),
  subgroup_vars = c("er", "meno"),
  subgroup_cuts = list(
    er = list(type = "quantile", value = er_quantile),
    meno = 0
  ),
  k_treat = 1.0,
  verbose = FALSE
)

calibration_results <- data.frame(
  Target_HR = 2.0,
  Optimal_k_inter = round(result$k_inter, 4),
  Achieved_HR = round(result$achieved_hr_harm, 4),
  Error = round(result$error, 6)
)

kable(calibration_results, 
      col.names = c("Target HR", "Optimal k_inter", "Achieved HR", "Error")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Target HR Optimal k_inter Achieved HR Error
treat 2 1.3009 2.0002 0.000162

3.2 Sensitivity Analysis

We examine how k_inter affects the hazard ratios across different subgroups.

Show code
# Define base parameters
base_params <- list(
  data = df_gbsg,
  continuous_vars = c("age", "size", "nodes", "pgr", "er"),
  factor_vars = c("meno", "grade"),
  outcome_var = "tte",
  event_var = "status",
  treatment_var = "hormon",
  subgroup_vars = c("er", "meno"),
  subgroup_cuts = list(
    er = list(type = "quantile", value = er_quantile),
    meno = 0
  ),
  k_treat = 1.0,
  n_super = 5000
)

# Run sensitivity analysis
sensitivity_results <- do.call(sensitivity_analysis_k_inter, c(
  list(
    k_inter_range = c(-1.5, 1.5),
    n_points = 11,
    model = "alt"
  ),
  base_params
))
Running sensitivity analysis...

  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |======================================================================| 100%

Sensitivity of Hazard Ratios to Interaction Parameter
Show code
# Create long format for plotting
sens_long <- sensitivity_results %>%
  select(k_inter, hr_harm, hr_no_harm, hr_overall) %>%
  pivot_longer(cols = -k_inter, names_to = "Group", values_to = "HR") %>%
  mutate(Group = case_when(
    Group == "hr_harm" ~ "Harm Subgroup",
    Group == "hr_no_harm" ~ "No-Harm Subgroup",
    Group == "hr_overall" ~ "Overall"
  ))

# Plot sensitivity
ggplot(sens_long, aes(x = k_inter, y = HR, color = Group)) +
  geom_line(size = 1.2) +
  geom_point(size = 3) +
  geom_hline(yintercept = 1, linetype = "dashed", alpha = 0.5) +
  geom_vline(xintercept = result$k_inter, linetype = "dotted", 
             color = "red", alpha = 0.7) +
  scale_color_manual(values = c("Harm Subgroup" = "darkred",
                               "No-Harm Subgroup" = "darkblue",
                               "Overall" = "darkgreen")) +
  labs(title = "Sensitivity Analysis: Hazard Ratios vs Interaction Parameter",
       subtitle = paste("Red line indicates k_inter =", round(result$k_inter, 3), 
                       "for target HR = 2.0"),
       x = "Interaction Parameter (k_inter)",
       y = "Hazard Ratio",
       color = "Population") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "bottom")

Sensitivity of Hazard Ratios to Interaction Parameter

4 Simulation Studies

4.1 Null Scenario (No Interaction)

Show code
# Generate DGM with no interaction
dgm_null <- generate_aft_dgm_flex(
  data = df_gbsg,
  n_super = 5000,
  continuous_vars = c("age", "er", "pgr"),
  factor_vars = c("meno", "grade"),
  outcome_var = "tte",
  event_var = "status",
  treatment_var = "hormon",
  subgroup_vars = c("er", "meno"),
  subgroup_cuts = list(
    er = list(type = "quantile", value = er_quantile),
    meno = 0
  ),
  model = "alt",
  k_inter = 0.0,
  verbose = FALSE
)

# Simulate data
set.seed(123)
draw_null <- simulate_from_dgm(
  dgm = dgm_null, 
  n = 700, 
  rand_ratio = 1, 
  draw_treatment = TRUE, 
  max_follow = Inf, 
  seed = 123
)

# Summary statistics
null_summary <- data.frame(
  Metric = c("Sample Size", "Event Rate", "Treatment Allocation", 
             "Subgroup Size", "Subgroup Proportion"),
  Value = c(nrow(draw_null),
            mean(draw_null$event_sim),
            mean(draw_null$treat_sim),
            sum(draw_null$flag_harm),
            mean(draw_null$flag_harm))
)

kable(null_summary, digits = 3) %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Metric Value
Sample Size 700.000
Event Rate 0.389
Treatment Allocation 0.500
Subgroup Size 87.000
Subgroup Proportion 0.124
Show code
# Prepare data for KM plot
dfcount_null <- df_counting(
  df = draw_null, 
  tte.name = "y_sim", 
  event.name = "event_sim", 
  treat.name = "treat_sim"
)

# Create KM plot
par(mfrow=c(1,1))
plot_weighted_km(dfcount_null, conf.int = TRUE, show.logrank = TRUE, ymax = 1.05)
title(main = "Null Scenario: No Treatment-Subgroup Interaction")

Kaplan-Meier Curves: Null Scenario

4.2 Alternative Scenario (With Interaction)

Show code
# Generate DGM with calibrated interaction
dgm_alt <- generate_aft_dgm_flex(
  data = df_gbsg,
  n_super = 5000,
  continuous_vars = c("age", "er", "pgr"),
  factor_vars = c("meno", "grade"),
  outcome_var = "tte",
  event_var = "status",
  treatment_var = "hormon",
  subgroup_vars = c("er", "meno"),
  subgroup_cuts = list(
    er = list(type = "quantile", value = er_quantile),
    meno = 0
  ),
  model = "alt",
  k_inter = result$k_inter,
  verbose = FALSE
)

# Simulate data
set.seed(123)
draw_alt <- simulate_from_dgm(
  dgm = dgm_alt, 
  n = 700, 
  rand_ratio = 1,
  max_follow = Inf, 
  seed = 123
)

# Summary statistics
alt_summary <- data.frame(
  Metric = c("Sample Size", "Event Rate", "Treatment Allocation", 
             "Subgroup Size", "Subgroup Proportion"),
  Value = c(nrow(draw_alt),
            mean(draw_alt$event_sim),
            mean(draw_alt$treat_sim),
            sum(draw_alt$flag_harm),
            mean(draw_alt$flag_harm))
)

kable(alt_summary, digits = 3) %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Metric Value
Sample Size 700.000
Event Rate 0.419
Treatment Allocation 0.500
Subgroup Size 87.000
Subgroup Proportion 0.124
Show code
# Prepare data for KM plot
dfcount_alt <- df_counting(
  df = draw_alt, 
  tte.name = "y_sim", 
  event.name = "event_sim", 
  treat.name = "treat_sim"
)

# Create KM plot
par(mfrow=c(1,1))
plot_weighted_km(dfcount_alt, conf.int = TRUE, show.logrank = TRUE, ymax = 1.05)
title(main = "Alternative Scenario: With Treatment-Subgroup Interaction")

Kaplan-Meier Curves: Alternative Scenario

5 Forest Search Analysis

5.1 Methodology

Forest search is a systematic approach to identify subgroups with differential treatment effects using:

  1. Recursive partitioning to explore covariate space
  2. Hazard ratio thresholds to identify meaningful effects
  3. Consistency criteria to ensure stability
  4. Bootstrap inference for uncertainty quantification

5.2 Forest Search on Alternative Scenario

Show code
# Define confounders for forest search
confounders.name <- c("z_age", "z_er", "z_pgr", "z_meno", 
                      "z_grade_1", "z_grade_2", "size", "nodes")

# Setup parallel processing
library(doFuture)
library(doRNG)

registerDoFuture()
registerDoRNG()

df_null <- as.data.frame(draw_null)

system.time({fs <- forestsearch(df_null,  confounders.name=confounders.name,
                                outcome.name = "y_sim", treat.name = "treat_sim", event.name = "event_sim", id.name = "id",
                                potentialOutcome.name = "loghr_po", flag_harm.name = "flag_harm",
                                hr.threshold = 1.25, hr.consistency = 1.0, pconsistency.threshold = 0.90,
                                sg_focus = "hrMaxSG",
                                showten_subgroups = FALSE, details=TRUE,
                                conf_force = c("z_age <= 65", "z_er <= 0", "z_er <= 1", "z_er <= 2","z_er <= 5"),
                                cut_type = "default", use_grf = TRUE, plot.grf = TRUE, use_lasso = TRUE,
                                maxk = 2, n.min = 60, d0.min = 12, d1.min = 12,
                                plot.sg = TRUE, by.risk = 12,
                                parallel_args = list(plan="callr", workers = 12, show_message = TRUE)
)
})
GRF stage for cut selection with dmin,tau= 12 0.6 
tau, maxdepth = 52.8205 2 
   leaf.node control.mean control.size control.se depth
1          2        -4.29       692.00       1.22     1
11         4        -5.86       457.00       1.49     2
3          6         8.25        68.00       3.73     2
4          7        -5.85       154.00       2.59     2

Selected subgroup:
  leaf.node control.mean control.size control.se depth
3         6         8.25        68.00       3.73     2

GRF subgroup found
Terminating node at max.diff (sg.harm.id):
[1] "size <= 25"

All splits:
[1] "size <= 80" "nodes <= 5" "size <= 45" "size <= 25"
# of continuous/categorical characteristics 5 3 
Continuous characteristics: z_age z_er z_pgr size nodes 
Categorical characteristics: z_meno z_grade_1 z_grade_2 
## Prior to lasso: z_age z_er z_pgr size nodes 
#### Lasso selection results 
8 x 1 sparse Matrix of class "dgCMatrix"
                     s0
z_age      .           
z_er      -0.0001676937
z_pgr     -0.0011160165
z_meno     0.1577286538
z_grade_1 -0.7152106695
z_grade_2  .           
size       0.0008849282
nodes      .           
Cox-LASSO selected: z_er z_pgr z_meno z_grade_1 size 
Cox-LASSO not selected: z_age z_grade_2 nodes 
### End Lasso selection 
## After lasso: z_er z_pgr size 
Default cuts included from Lasso: z_er <= mean(z_er) z_er <= median(z_er) z_er <= qlow(z_er) z_er <= qhigh(z_er) z_pgr <= mean(z_pgr) z_pgr <= median(z_pgr) z_pgr <= qlow(z_pgr) z_pgr <= qhigh(z_pgr) size <= mean(size) size <= median(size) size <= qlow(size) size <= qhigh(size) 
Categorical after Lasso: z_meno z_grade_1 
Factors per GRF: size <= 80 nodes <= 5 size <= 45 size <= 25 
Initial GRF cuts included size <= 80 nodes <= 5 size <= 45 size <= 25 
Factors included per GRF (not in lasso) size <= 80 nodes <= 5 size <= 45 size <= 25 

===== CONSOLIDATED CUT EVALUATION (IMPROVED) =====
Evaluating 22 cut expressions once and caching...
Cut evaluation summary:
  Total cuts:  22 
  Valid cuts:  22 
  Errors:  0 
✓ All 22 factors validated as 0/1
===== END CONSOLIDATED CUT EVALUATION =====

# of candidate subgroup factors= 22 
 [1] "size <= 80"     "nodes <= 5"     "size <= 45"     "size <= 25"    
 [5] "z_age <= 65"    "z_er <= 0"      "z_er <= 1"      "z_er <= 2"     
 [9] "z_er <= 5"      "z_er <= 97.1"   "z_er <= 38"     "z_er <= 9"     
[13] "z_er <= 117"    "z_pgr <= 111.1" "z_pgr <= 33.5"  "z_pgr <= 8"    
[17] "z_pgr <= 135"   "size <= 29.7"   "size <= 20"     "size <= 35"    
[21] "z_meno"         "z_grade_1"     
Number of factors evaluated= 22 
Confounders per grf screening q2 q16 q9 q21 q19 q14 q3 q15 q11 q12 q20 q18 q10 q17 q4 q13 q6 q5 q8 q7 q22 q1 
          Factors Labels VI(grf)
2      nodes <= 5     q2  0.1282
16     z_pgr <= 8    q16  0.1120
9       z_er <= 5     q9  0.0721
21         z_meno    q21  0.0666
19     size <= 20    q19  0.0624
14 z_pgr <= 111.1    q14  0.0575
3      size <= 45     q3  0.0569
15  z_pgr <= 33.5    q15  0.0493
11     z_er <= 38    q11  0.0464
12      z_er <= 9    q12  0.0431
20     size <= 35    q20  0.0431
18   size <= 29.7    q18  0.0386
10   z_er <= 97.1    q10  0.0383
17   z_pgr <= 135    q17  0.0364
4      size <= 25     q4  0.0362
13    z_er <= 117    q13  0.0350
6       z_er <= 0     q6  0.0230
5     z_age <= 65     q5  0.0165
8       z_er <= 2     q8  0.0164
7       z_er <= 1     q7  0.0142
22      z_grade_1    q22  0.0080
1      size <= 80     q1  0.0000
Number of possible configurations (<= maxk): maxk = 2 , # combinations = 990 
Events criteria: control >= 12 , treatment >= 12 
Subgroup search completed in 0.02 minutes
Found 2 subgroup candidate(s)
# of candidate subgroups (meeting all criteria) = 2 
# of unique initial candidates: 2 
# Restricting to top stop_Kgroups = 10 
# of candidates restricted to 'top K': 2 
Using parallel processing: callr with 12 workers
*** Not met: Subgroup, % Consistency = !{nodes <= 5} {size <= 29.7} 0.85 
*** Not met: Subgroup, % Consistency = !{nodes <= 5} !{z_pgr <= 33.5} 0.6 
All subgroup evaluations returned NULL
Minutes forestsearch overall = 0.07 
   user  system elapsed 
  7.769   0.243   4.007 
Show code
plan("sequential")
Show code
# Define confounders for forest search
confounders.name <- c("z_age", "z_er", "z_pgr", "z_meno", 
                      "z_grade_1", "z_grade_2", "size", "nodes")

# Setup parallel processing
library(doFuture)
library(doRNG)

registerDoFuture()
registerDoRNG()

df_alt <- as.data.frame(draw_alt)

system.time({fs <- forestsearch(df_alt,  confounders.name=confounders.name,
                                outcome.name = "y_sim", treat.name = "treat_sim", event.name = "event_sim", id.name = "id",
                                hr.threshold = 1.25, hr.consistency = 1.0, pconsistency.threshold = 0.90,
                                potentialOutcome.name = "loghr_po", flag_harm.name = "flag_harm",
                                sg_focus = "hrMaxSG",
                                showten_subgroups = FALSE, details=TRUE,
                                conf_force = c("z_age <= 65", "z_er <= 0", "z_er <= 1", "z_er <= 2","z_er <= 5"),
                                cut_type = "default", use_grf = TRUE, plot.grf = TRUE, use_lasso = TRUE,
                                maxk = 2, n.min = 60, d0.min = 12, d1.min = 12,
                                plot.sg = TRUE, by.risk = 12,
                                parallel_args = list(plan="callr", workers = 12, show_message = TRUE)
)
})
GRF stage for cut selection with dmin,tau= 12 0.6 
tau, maxdepth = 52.8205 2 
   leaf.node control.mean control.size control.se depth
1          2         3.44       124.00       3.27     1
2          3        -3.43       576.00       1.33     1
11         4        -4.38       302.00       1.88     2
21         5         9.60        68.00       3.80     2
4          7        -4.17       285.00       1.89     2

Selected subgroup:
   leaf.node control.mean control.size control.se depth
21         5          9.6         68.0        3.8     2

GRF subgroup found
Terminating node at max.diff (sg.harm.id):
[1] "nodes <= 5"

All splits:
[1] "z_er <= 4"  "size <= 26" "nodes <= 5" "z_er <= 0" 
# of continuous/categorical characteristics 5 3 
Continuous characteristics: z_age z_er z_pgr size nodes 
Categorical characteristics: z_meno z_grade_1 z_grade_2 
## Prior to lasso: z_age z_er z_pgr size nodes 
#### Lasso selection results 
8 x 1 sparse Matrix of class "dgCMatrix"
                     s0
z_age     -0.0008522668
z_er      -0.0003750320
z_pgr     -0.0015055782
z_meno     .           
z_grade_1 -0.7506140627
z_grade_2 -0.0049440538
size       0.0011962601
nodes      .           
Cox-LASSO selected: z_age z_er z_pgr z_grade_1 z_grade_2 size 
Cox-LASSO not selected: z_meno nodes 
### End Lasso selection 
## After lasso: z_age z_er z_pgr size 
Default cuts included from Lasso: z_age <= mean(z_age) z_age <= median(z_age) z_age <= qlow(z_age) z_age <= qhigh(z_age) z_er <= mean(z_er) z_er <= median(z_er) z_er <= qlow(z_er) z_er <= qhigh(z_er) z_pgr <= mean(z_pgr) z_pgr <= median(z_pgr) z_pgr <= qlow(z_pgr) z_pgr <= qhigh(z_pgr) size <= mean(size) size <= median(size) size <= qlow(size) size <= qhigh(size) 
Categorical after Lasso: z_grade_1 z_grade_2 
Factors per GRF: z_er <= 4 size <= 26 nodes <= 5 z_er <= 0 
Initial GRF cuts included z_er <= 4 size <= 26 nodes <= 5 z_er <= 0 
Factors included per GRF (not in lasso) z_er <= 4 size <= 26 nodes <= 5 z_er <= 0 

===== CONSOLIDATED CUT EVALUATION (IMPROVED) =====
Evaluating 26 cut expressions once and caching...
Cut evaluation summary:
  Total cuts:  26 
  Valid cuts:  26 
  Errors:  0 
✓ All 26 factors validated as 0/1
===== END CONSOLIDATED CUT EVALUATION =====

# of candidate subgroup factors= 26 
 [1] "z_er <= 4"      "size <= 26"     "nodes <= 5"     "z_er <= 0"     
 [5] "z_age <= 65"    "z_er <= 1"      "z_er <= 2"      "z_er <= 5"     
 [9] "z_age <= 53.4"  "z_age <= 53"    "z_age <= 46"    "z_age <= 61"   
[13] "z_er <= 97.1"   "z_er <= 38"     "z_er <= 9"      "z_er <= 117"   
[17] "z_pgr <= 111.1" "z_pgr <= 33.5"  "z_pgr <= 8"     "z_pgr <= 135"  
[21] "size <= 29.7"   "size <= 25"     "size <= 20"     "size <= 35"    
[25] "z_grade_1"      "z_grade_2"     
Number of factors evaluated= 26 
Confounders per grf screening q3 q12 q10 q9 q4 q1 q26 q11 q18 q24 q6 q23 q14 q15 q21 q19 q17 q13 q16 q7 q20 q2 q8 q22 q5 q25 
          Factors Labels VI(grf)
3      nodes <= 5     q3  0.0958
12    z_age <= 61    q12  0.0865
10    z_age <= 53    q10  0.0671
9   z_age <= 53.4     q9  0.0640
4       z_er <= 0     q4  0.0618
1       z_er <= 4     q1  0.0520
26      z_grade_2    q26  0.0462
11    z_age <= 46    q11  0.0461
18  z_pgr <= 33.5    q18  0.0388
24     size <= 35    q24  0.0386
6       z_er <= 1     q6  0.0381
23     size <= 20    q23  0.0374
14     z_er <= 38    q14  0.0373
15      z_er <= 9    q15  0.0371
21   size <= 29.7    q21  0.0326
19     z_pgr <= 8    q19  0.0318
17 z_pgr <= 111.1    q17  0.0278
13   z_er <= 97.1    q13  0.0275
16    z_er <= 117    q16  0.0242
7       z_er <= 2     q7  0.0236
20   z_pgr <= 135    q20  0.0214
2      size <= 26     q2  0.0191
8       z_er <= 5     q8  0.0160
22     size <= 25    q22  0.0158
5     z_age <= 65     q5  0.0100
25      z_grade_1    q25  0.0036
Number of possible configurations (<= maxk): maxk = 2 , # combinations = 1378 
Events criteria: control >= 12 , treatment >= 12 
Subgroup search completed in 0.03 minutes
Found 71 subgroup candidate(s)
# of candidate subgroups (meeting all criteria) = 71 
Removed 13 near-duplicate subgroups
Original rows: 71 
After removal: 58 
# of unique initial candidates: 58 
# Restricting to top stop_Kgroups = 10 
# of candidates restricted to 'top K': 10 
Using parallel processing: callr with 12 workers
Consistency met!
# of splits = 1000 
**** Subgroup, % Consistency Met= {z_age <= 53} {z_er <= 2} 0.98 
Consistency met!
# of splits = 1000 
**** Subgroup, % Consistency Met= {z_age <= 53} {z_er <= 4} 0.99 
Consistency met!
# of splits = 1000 
**** Subgroup, % Consistency Met= {z_age <= 53} {z_er <= 1} 0.98 
Consistency met!
# of splits = 1000 
**** Subgroup, % Consistency Met= {z_age <= 61} {z_er <= 0} 0.96 
Consistency met!
# of splits = 1000 
**** Subgroup, % Consistency Met= {z_age <= 53} {z_er <= 5} 0.98 
Consistency met!
# of splits = 1000 
**** Subgroup, % Consistency Met= {z_er <= 0} {z_age <= 65} 0.94 
Consistency met!
# of splits = 1000 
**** Subgroup, % Consistency Met= {z_age <= 53} {z_er <= 9} 0.98 
Consistency met!
# of splits = 1000 
**** Subgroup, % Consistency Met= !{nodes <= 5} {size <= 29.7} 0.91 
Consistency met!
# of splits = 1000 
**** Subgroup, % Consistency Met= {z_er <= 0} 0.9 
Consistency met!
# of splits = 1000 
**** Subgroup, % Consistency Met= {z_age <= 61} {z_er <= 1} 0.93 

*** Subgroup found: {z_age <= 53} {z_er <= 9} 
% consistency criteria met= 0.98 
SG focus= hrMaxSG 
Subgroup Consistency Minutes= 0.0489 
Subgroup found (FS) with sg_focus='hrMaxSG'
Selected subgroup: {z_age <= 53} & {z_er <= 9} 
Minutes forestsearch overall = 0.08 
   user  system elapsed 
 27.413   1.199   5.014 
Show code
with(fs$df.est, table(flag_harm, treat.recommend))
         treat.recommend
flag_harm   0   1
        0  21 592
        1  87   0
Show code
# Extract results
res_tabs <- sg_tables(fs, ndecimals = 3)

res_tabs$sg10_out
Identified Subgroups
Two-factor subgroups (maxk=2)
Factor 1 Factor 2 N Events E₁ HR Pcons
{z_age <= 53} {z_er <= 9} 108 61 31 1.919 0.980
{z_age <= 53} {z_er <= 5} 94 56 30 2.056 0.980
{z_age <= 61} {z_er <= 1} 79 46 28 1.767 0.930
{z_age <= 53} {z_er <= 4} 77 46 29 2.309 0.990
!{nodes <= 5} {size <= 29.7} 77 37 23 1.814 0.910
{z_er <= 0} 73 39 24 1.784 0.900
{z_er <= 0} {z_age <= 65} 67 35 21 1.962 0.940
{z_age <= 53} {z_er <= 2} 66 37 23 2.396 0.980
{z_age <= 53} {z_er <= 1} 64 36 22 2.302 0.980
{z_age <= 61} {z_er <= 0} 62 33 20 2.162 0.960
Search Configuration: Single-factor candidates (L) = 52; Maximum combinations evaluated = 1,378; Search depth (maxk) = 2
Search Results: Candidate subgroups found = 71; Maximum HR estimate = 2.4
Note: E₁ = events in treatment arm; Pcons = consistency proportion
Show code
res_tabs$tab_estimates
Treatment Effect Estimates
Training data estimates
Subgroup n n1 events m1 m0 RMST HR (95% CI) AHR(po)
ITT 700 (100.0%) 350 (50.0%) 293 (41.9%) 67.9 50 4.4 0.80 (0.64, 1.01) 0.72
Questionable 108 (15.4%) 45 (41.7%) 61 (56.5%) 36.4 46.5 -8.5 1.92 (1.17, 3.16) 1.67
Recommend 592 (84.6%) 305 (51.5%) 232 (39.2%) 76.7 53.5 8.1 0.70 (0.54, 0.90) 0.62
Show code
plan("sequential")

5.3 Bootstrap Inference

Show code
output_dir <- "results/"
save_results <- dir.exists(output_dir)

# patchhwork needed for a combined bootstrap plot (otherwise if not avaialable will not produce)
library(patchwork)

# Number of bootstrap samples
NB <- 1000

# Run bootstrap
t.start <- proc.time()[3]

fs_bc <- forestsearch_bootstrap_dofuture(
  fs.est = fs, 
  nb_boots = NB, 
  show_three = FALSE, 
  details = TRUE)
Ystar matrix generated should be 'boots x N': 1000 x 700

=== Bootstrap Analysis Complete ===
Success rate: 95.4% (954/1000)

H (Questionable) Estimates:
  Unadjusted:       1.92 (1.17,3.16) 
  Bias-corrected:  1.55 (0.84,2.85) 

Hc (Recommend) Estimates:
  Unadjusted:       0.7 (0.54,0.9) 
  Bias-corrected:  0.72 (0.49,1.06) 
===================================
Show code
plan("sequential")

t.min <- (proc.time()[3] - t.start) / 60

if (save_results) {
    filename <- file.path(output_dir, 
                         paste0("sim_gbsg_example_B=", 
                                format(NB), 
                                ".RData"))
    save(fs_bc, fs, file = filename)
    cat("\nResults saved to:", filename, "\n")
}

Results saved to: results//sim_gbsg_example_B=1000.RData 
Show code
#load("results//sim_gbsg_example_B=1000.RData")

summaries <- summarize_bootstrap_results(
      sgharm = fs$sg.harm,
      boot_results = fs_bc,
      create_plots = TRUE,
      est.scale = "log.hr"
    )

═══════════════════════════════════════════════════════════════
           BOOTSTRAP ANALYSIS SUMMARY                          
═══════════════════════════════════════════════════════════════

BOOTSTRAP SUCCESS METRICS:
─────────────────────────────────────────────────────────────
  Total iterations:              1000
  Successful subgroup ID:        954 (95.4%)
  Failed to find subgroup:       46 (4.6%)

TIMING ANALYSIS:
─────────────────────────────────────────────────────────────
Overall:
  Total bootstrap time:          27.91 minutes (0.47 hours)
  Average per iteration:         0.03 min (1.7 sec)

Per-iteration timing:
  Mean:                          0.33 min (19.5 sec)
  Median:                        0.33 min (19.8 sec)
  Std Dev:                       0.04 minutes
  Range:                         [0.05, 0.39] minutes
  IQR:                           [0.32, 0.35] minutes

ForestSearch timing (successful iterations only):
  Iterations with FS:            1000 (100.0%)
  Mean FS time:                  0.33 min (19.5 sec)
  Median FS time:                0.33 min (19.8 sec)
  Total FS time:                 325.24 minutes
  FS time % of total:            1165.2%

Overhead timing (Cox models, bias correction, etc.):
  Mean overhead:                 0.00 min (0.0 sec)
  Median overhead:               0.00 min (0.0 sec)
  Total overhead:                0.18 minutes
  Overhead % of total:           0.6%

PERFORMANCE ASSESSMENT:
─────────────────────────────────────────────────────────────
  Performance rating:            ✓✓✓ Excellent
  Average iteration speed:       1.7 seconds

═══════════════════════════════════════════════════════════════
Show code
sg_tab <- summaries$table

sg_tab
Treatment Effect by Subgroup
Bootstrap bias-corrected estimates (1000 iterations)
Subgroup
Sample Size
Survival
Treatment Effect
AHR(po)
N NT Events MedT MedC RMSTd HR
(95% CI)1
HR
(95% CI)2
Qstnbl 108 (15.4%) 45 (41.7%) 61 (56.5%) 36.4 46.5 -8.5 1.92 (1.17, 3.16) 1.55 (0.84,2.85) 1.67
Recmnd 592 (84.6%) 305 (51.5%) 232 (39.2%) 76.7 53.5 8.1 0.70 (0.54, 0.90) 0.72 (0.49,1.06) 0.62
1 Unadjusted HR: Standard Cox regression hazard ratio with robust standard errors
2 Bias-corrected HR: Bootstrap-adjusted estimate using infinitesimal jacknife method (1000 iterations). Corrects for optimism in subgroup selection.
Note: Med = Median survival time (months). RMSTd = Restricted mean survival time difference. Subgroup identified in 95.4% of bootstrap samples.
Show code
event_summary <- summarize_bootstrap_events(fs_bc, threshold = 12)

=== Bootstrap Event Count Summary ===
Total bootstrap iterations: 1000
Event threshold: <12 events

ORIGINAL Subgroup H on BOOTSTRAP samples:
  Control arm <12 events: 0 (0.0%)
  Treatment arm <12 events: 0 (0.0%)
  Either arm <12 events: 0 (0.0%)

ORIGINAL Subgroup Hc on BOOTSTRAP samples:
  Control arm <12 events: 0 (0.0%)
  Treatment arm <12 events: 0 (0.0%)
  Either arm <12 events: 0 (0.0%)

NEW Subgroups found: 954 (95.4%)

NEW Subgroup H* on ORIGINAL data:
  Control arm <12 events: 0 (0.0% of successful)
  Treatment arm <12 events: 0 (0.0% of successful)
  Either arm <12 events: 0 (0.0% of successful)

NEW Subgroup Hc* on ORIGINAL data:
  Control arm <12 events: 0 (0.0% of successful)
  Treatment arm <12 events: 0 (0.0% of successful)
  Either arm <12 events: 0 (0.0% of successful)
Show code
summaries$diagnostics_table_gt
Bootstrap Diagnostics Summary
Analysis of 1000 bootstrap iterations
Category Metric Value
Success Rate1 Total iterations 1000
Successful subgroup ID 954 (95.4%)
Failed to find subgroup 46 (4.6%)
Success rating Excellent ✓✓✓
Subgroup H (Questionable) Unadjusted estimate 1.92 (1.17, 3.16)
Bias-corrected estimate 1.55 (0.84, 2.85)
Bias correction impact2 19.2%
CI width change3 2.00 → 2.00
Subgroup Hc (Recommend) Unadjusted estimate 0.70 (0.54, 0.90)
Bias-corrected estimate 0.72 (0.49, 1.06)
Bias correction impact2 3.0%
CI width change3 0.36 → 0.57
Bootstrap Quality: H Valid iterations 954
Mean (SD) 0.44 (0.37)
Coefficient of variation4 84.7%
Skewness5 -0.03
Bootstrap Quality: Hc Valid iterations 954
Mean (SD) -0.33 (0.22)
Coefficient of variation4 65.5%
Skewness5 -0.00
Search Performance Mean max HR found 2.91 (0.80)
Mean factors evaluated 46.1
Mean combinations tried 1134
Proportion at maxk
1 Success Rate: Proportion of bootstrap samples where ForestSearch identified a valid subgroup
2 Bias Correction Impact: Percentage change from unadjusted to bias-corrected estimate
3 CI Width Change: Confidence interval width before → after bias correction
4 Coefficient of Variation: Standard deviation as % of mean (lower is better)
5 Skewness: Measure of asymmetry (0 = symmetric, |skew| < 1 is generally good)
Interpretation Guide:

Excellent stability: Subgroup is consistently identified across bootstrap samples.

High variability: Bootstrap estimates are imprecise (CV ≥ 25%). Consider increasing nb_boots or sample size.

Show code
summaries$subgroup_summary$original_agreement
NULL
Show code
summaries$subgroup_summary$agreement
                           Subgroup  K_sg     N Percent_of_successful  Rank
                             <char> <num> <int>                 <num> <int>
  1:                    {z_er <= 1}     1    24             2.5157233     1
  2:    {z_age <= 54} & {z_er <= 5}     2    18             1.8867925     2
  3:    !{nodes <= 5} & {z_grade_2}     2    16             1.6771488     3
  4:        !{z_meno} & {z_er <= 8}     2    15             1.5723270     4
  5:    {z_age <= 53} & {z_er <= 5}     2    13             1.3626834     5
 ---                                                                       
617:   !{size <= 30} & {nodes <= 5}     2     1             0.1048218   617
618: !{nodes <= 8} & {z_pgr <= 138}     2     1             0.1048218   618
619:                    {z_er <= 4}     1     1             0.1048218   619
620:    !{size <= 21} & {z_er <= 5}     2     1             0.1048218   620
621:   !{size <= 44} & !{z_er <= 5}     2     1             0.1048218   621
Show code
summaries$subgroup_summary$factor_presence
    Rank    Factor Count   Percent
   <int>    <char> <int>     <num>
1:     1      z_er   528 55.345912
2:     2     z_age   434 45.492662
3:     3     nodes   291 30.503145
4:     4      size   192 20.125786
5:     5     z_pgr   185 19.392034
6:     6    z_meno   118 12.368973
7:     7 z_grade_2    80  8.385744
8:     8 z_grade_1    16  1.677149
Show code
summaries$subgroup_summary$factor_presence_specific
Empty data.table (0 rows and 3 cols): Factor_Definition,Count,Percent
Show code
fs_bc$summary$plots$combined
NULL